{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import random\n",
    "import pickle\n",
    "import jsonlines"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load dict from file\n",
    "with open(\"/<YOUR_OWN_PATH>/ToolQA/data/external_corpus/dblp/title2id_dict.pkl\", \"rb\") as f:\n",
    "    title2id_dict = pickle.load(f)\n",
    "with open(\"/<YOUR_OWN_PATH>/ToolQA/data/external_corpus/dblp/author2id_dict.pkl\", \"rb\") as f:\n",
    "    author2id_dict = pickle.load(f)\n",
    "with open(\"/<YOUR_OWN_PATH>/ToolQA/data/external_corpus/dblp/id2title_dict.pkl\", \"rb\") as f:\n",
    "    id2title_dict = pickle.load(f)\n",
    "with open(\"/<YOUR_OWN_PATH>/ToolQA/data/external_corpus/dblp/id2author_dict.pkl\", \"rb\") as f:\n",
    "    id2author_dict = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the graphs from a file\n",
    "with open('/<YOUR_OWN_PATH>/ToolQA/data/external_corpus/dblp/paper_net.pkl', 'rb') as f:\n",
    "    paper_net = pickle.load(f)\n",
    "\n",
    "with open('/<YOUR_OWN_PATH>/ToolQA/data/external_corpus/dblp/author_net.pkl', 'rb') as f:\n",
    "    author_net = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_question_per_template = 10\n",
    "question_id = 0\n",
    "questions = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-dblp-0000', 'question': 'Who are the authors of Time to Leak: Cross-Device Timing Attack On Edge Deep Learning Accelerator?', 'answer': 'Yoo-Seung Won, Soham Chatterjee, Dirmanto Jap, Shivam Bhasin, Arindam Basu'}, {'qid': 'easy-dblp-0001', 'question': 'Who are the authors of Intrusion detection in networks using cuckoo search optimization?', 'answer': 'Imran  Muhammad, Khan  Sangeen, Hlavacs  Helmut, Khan  Fakhri Alam, Anwar  Sajid'}, {'qid': 'easy-dblp-0002', 'question': 'Who are the authors of Visionary automation of sack handling and emptying?', 'answer': 'Kavoussanos, M., Pouliezos, A.'}, {'qid': 'easy-dblp-0003', 'question': 'Who are the authors of Environment mapping and other applications of world projections?', 'answer': 'Ned Greene'}, {'qid': 'easy-dblp-0004', 'question': 'Who are the authors of Administrative scope: A foundation for role-based administrative models?', 'answer': 'Jason Crampton, George Loizou'}, {'qid': 'easy-dblp-0005', 'question': 'Who are the authors of Operational prototyping: a new development approach?', 'answer': 'Alan M. Davis'}, {'qid': 'easy-dblp-0006', 'question': 'Who are the authors of A software tool for human-robot shared-workspace collaboration with task precedence constraints?', 'answer': 'Terrin Babu Pulikottil, Stefania Pellegrinelli, Nicola Pedrocchi'}, {'qid': 'easy-dblp-0007', 'question': 'Who are the authors of Structured Prediction For Crisp Inverse Kinematics Learning With Misspecified Robot Models?', 'answer': 'Gian Maria Marconi, Rafaello Camoriano, Lorenzo Rosasco, Carlo Ciliberto'}, {'qid': 'easy-dblp-0008', 'question': 'Who are the authors of Design of an Autonomous Latching System for Surface Vessels?', 'answer': 'David Fernández-Gutiérrez, Niklas Hagemann, Wei Wang, Rens Doornbusch, Joshua Jordan, Jonathan Schiphorst, Pietro Leoni, Fabio Duarte, Carlo Ratti, Daniela Rus'}, {'qid': 'easy-dblp-0009', 'question': 'Who are the authors of Utilizing a Non-Motor Symptoms Questionnaire and Machine Learning to Differentiate Movement Disorders.?', 'answer': 'Alexander Brenner, Lucas Plagwitz, Michael Fujarski, Tobias Warnecke, Julian Varghese'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_question_per_template):\n",
    "    paper_id = random.choice(list(paper_net.nodes()))\n",
    "    paper_title = paper_net.nodes[paper_id][\"title\"]\n",
    "    question = \"Who are the authors of \" + paper_title + \"?\"\n",
    "    authors = paper_net.nodes[paper_id][\"authors\"]\n",
    "    answer = \", \".join([author['name'] for author in authors])\n",
    "    questions.append({\"qid\": \"easy-dblp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-dblp-0010', 'question': 'What organization is Bradley E. Rucker from?', 'answer': 'Computer Science University of Dayton, United States'}, {'qid': 'easy-dblp-0011', 'question': 'What organization is Ruben Puche-Panadero from?', 'answer': 'Univ Politecn Valencia, Inst Energy Engn, Camino Vera S-N, Valencia 46022, Spain'}, {'qid': 'easy-dblp-0012', 'question': 'What organization is Sarat Chandra Swain from?', 'answer': 'KIIT DU, Dept Elect Engn, Bhubaneswar, India'}, {'qid': 'easy-dblp-0013', 'question': 'What organization is Hongtao Su from?', 'answer': 'Xidian Univ, Natl Lab Radar Signal Proc, Xian 710071, Peoples R China'}, {'qid': 'easy-dblp-0014', 'question': 'What organization is Junru Jin from?', 'answer': 'Shandong Univ, Sch Software, Jinan 250101, Peoples R China'}, {'qid': 'easy-dblp-0015', 'question': 'What organization is Allison Woodruff from?', 'answer': 'Google, Mountain View, CA 94043 USA'}, {'qid': 'easy-dblp-0016', 'question': 'What organization is Allison C. Tam from?', 'answer': 'DeepMind'}, {'qid': 'easy-dblp-0017', 'question': 'What organization is Eric C Porges from?', 'answer': 'Department of Clinical and Health Psychology, Center for Cognitive Aging and Memory, and McKnight Brain Research Foundation, University of Florida, Gainesville, FL, USA'}, {'qid': 'easy-dblp-0018', 'question': 'What organization is Shengdong Zhang from?', 'answer': 'School of Electronic and Computer Engineering, Shenzhen Graduate School, Peking University, Shenzhen, China'}, {'qid': 'easy-dblp-0019', 'question': 'What organization is Zhanchao Li from?', 'answer': 'Guangdong Pharmaceut Univ, Sch Chem & Chem Engn, Guangzhou 510006, Peoples R China'}]\n"
     ]
    }
   ],
   "source": [
    "# What organization is {author_name} from?\n",
    "for i in range(num_question_per_template):\n",
    "    answer = \"\"\n",
    "    while answer == \"\":\n",
    "        author_id = random.choice(list(author_net.nodes()))\n",
    "        author_name = id2author_dict[author_id]\n",
    "        question = \"What organization is \" + author_name + \" from?\"\n",
    "        answer = author_net.nodes[author_id][\"org\"]\n",
    "    questions.append({\"qid\": \"easy-dblp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "How many pages is Turbocharging Treewidth-Bounded Bayesian Network Structure Learning? 9\n",
      "How many pages is Smoothing Methods for Nonlinear Complementarity Problems.? 19\n",
      "How many pages is Construction of an efficient multivariate dynamic screening system.? 13\n",
      "How many pages is End-to-End Learning of OFDM Waveforms with PAPR and ACLR Constraints? 6\n",
      "How many pages is Recommending API Usages for Mobile Apps with Hidden Markov Model.? 6\n",
      "How many pages is Finding a Shortest Odd Hole? 21\n",
      "How many pages is A Multinomial Naïve Bayesian (MNB) Network to Automatically Recommend Topics for GitHub Repositories.? 10\n",
      "How many pages is Pole-Zero Cancellation Speed Control With Variable Current Cut-Off Frequency for Servo Motors? 8\n",
      "How many pages is Refactoring Java Monoliths into Executable Microservice-Based Applications.? 8\n",
      "How many pages is A data reduction scheme for triangulated surfaces? 18\n",
      "[{'qid': 'easy-dblp-0020', 'question': 'How many pages is Turbocharging Treewidth-Bounded Bayesian Network Structure Learning?', 'answer': 9}, {'qid': 'easy-dblp-0021', 'question': 'How many pages is Smoothing Methods for Nonlinear Complementarity Problems.?', 'answer': 19}, {'qid': 'easy-dblp-0022', 'question': 'How many pages is Construction of an efficient multivariate dynamic screening system.?', 'answer': 13}, {'qid': 'easy-dblp-0023', 'question': 'How many pages is End-to-End Learning of OFDM Waveforms with PAPR and ACLR Constraints?', 'answer': 6}, {'qid': 'easy-dblp-0024', 'question': 'How many pages is Recommending API Usages for Mobile Apps with Hidden Markov Model.?', 'answer': 6}, {'qid': 'easy-dblp-0025', 'question': 'How many pages is Finding a Shortest Odd Hole?', 'answer': 21}, {'qid': 'easy-dblp-0026', 'question': 'How many pages is A Multinomial Naïve Bayesian (MNB) Network to Automatically Recommend Topics for GitHub Repositories.?', 'answer': 10}, {'qid': 'easy-dblp-0027', 'question': 'How many pages is Pole-Zero Cancellation Speed Control With Variable Current Cut-Off Frequency for Servo Motors?', 'answer': 8}, {'qid': 'easy-dblp-0028', 'question': 'How many pages is Refactoring Java Monoliths into Executable Microservice-Based Applications.?', 'answer': 8}, {'qid': 'easy-dblp-0029', 'question': 'How many pages is A data reduction scheme for triangulated surfaces?', 'answer': 18}]\n"
     ]
    }
   ],
   "source": [
    "# How many pages is {paper_title}?\n",
    "for i in range(num_question_per_template):\n",
    "    answer = 0\n",
    "    while answer <= 0:\n",
    "        paper_id = random.choice(list(paper_net.nodes()))\n",
    "        paper_title = paper_net.nodes[paper_id][\"title\"]\n",
    "        question = \"How many pages is \" + paper_title + \"?\"\n",
    "        if paper_net.nodes[paper_id][\"page_end\"] != '' and paper_net.nodes[paper_id][\"page_start\"] != '':\n",
    "            answer = int(float(paper_net.nodes[paper_id][\"page_end\"])) - int(float(paper_net.nodes[paper_id][\"page_start\"])) + 1\n",
    "        else:\n",
    "            answer = 0\n",
    "    print(question, answer)\n",
    "    questions.append({\"qid\": \"easy-dblp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-dblp-0030', 'question': 'How many papers did Rate-Distortion Optimal Transform Coefficient Selection for Unoccupied Regions in Video-Based Point Cloud Compression cite?', 'answer': 17}, {'qid': 'easy-dblp-0031', 'question': 'How many papers did A method to include reservoir operations in catchment hydrological models using SHETRAN cite?', 'answer': 5}, {'qid': 'easy-dblp-0032', 'question': 'How many papers did GPU-aware resource management in heterogeneous cloud data centers cite?', 'answer': 5}, {'qid': 'easy-dblp-0033', 'question': 'How many papers did High order discretization methods for spatial-dependent epidemic models cite?', 'answer': 4}, {'qid': 'easy-dblp-0034', 'question': 'How many papers did Performance-Guaranteed Fault-Tolerant Control for Uncertain Nonlinear Systems via Learning-Based Switching Scheme cite?', 'answer': 25}, {'qid': 'easy-dblp-0035', 'question': 'How many papers did Mean-Based Trace Reconstruction Over Oblivious Synchronization Channels cite?', 'answer': 15}, {'qid': 'easy-dblp-0036', 'question': 'How many papers did Contextual Documentation Referencing on Stack Overflow cite?', 'answer': 19}, {'qid': 'easy-dblp-0037', 'question': 'How many papers did Toward Predicting Stay Time for Private Car Users: A RNN-NALU Approach cite?', 'answer': 20}, {'qid': 'easy-dblp-0038', 'question': 'How many papers did A stacked deep learning approach to cyber-attacks detection in industrial systems: application to power system and gas pipeline systems cite?', 'answer': 20}, {'qid': 'easy-dblp-0039', 'question': 'How many papers did Software fault classification using extreme learning machine: a cognitive approach cite?', 'answer': 28}]\n"
     ]
    }
   ],
   "source": [
    "# How many papers did {paper_title} cite?\n",
    "for i in range(num_question_per_template):\n",
    "    answer = 0\n",
    "    while answer == 0:\n",
    "        paper_id = random.choice(list(id2title_dict.keys()))\n",
    "        paper_title = paper_net.nodes[paper_id][\"title\"]\n",
    "        question = \"How many papers did \" + paper_title + \" cite?\"\n",
    "        # print(paper_net.neighbors(paper_id))\n",
    "        answer = paper_net.out_degree(paper_id)\n",
    "    questions.append({\"qid\": \"easy-dblp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-dblp-0040', 'question': 'How many papers in the DBLP citation network cited A Data Aggregation Based Approach To Exploit Dynamic Spatio-Temporal Correlations For Citywide Crowd Flows Prediction In Fog Computing?', 'answer': 8}, {'qid': 'easy-dblp-0041', 'question': 'How many papers in the DBLP citation network cited Contrastive Adversarial Domain Adaptation for Machine Remaining Useful Life Prediction?', 'answer': 1}, {'qid': 'easy-dblp-0042', 'question': 'How many papers in the DBLP citation network cited Distributed Attack Detection in a Water Treatment Plant: Method and Case Study?', 'answer': 1}, {'qid': 'easy-dblp-0043', 'question': 'How many papers in the DBLP citation network cited Online and Unsupervised Anomaly Detection for Streaming Data Using an Array of Sliding Windows and PDDs?', 'answer': 1}, {'qid': 'easy-dblp-0044', 'question': 'How many papers in the DBLP citation network cited A novel gray wolf optimizer with RNA crossover operation for tackling the non-parametric modeling problem of FCC process?', 'answer': 1}, {'qid': 'easy-dblp-0045', 'question': 'How many papers in the DBLP citation network cited A Grouping Differential Evolution Algorithm Boosted by Attraction and Repulsion Strategies for Masi Entropy-Based Multi-Level Image Segmentation?', 'answer': 1}, {'qid': 'easy-dblp-0046', 'question': 'How many papers in the DBLP citation network cited AntiSense: Standard-compliant CSI obfuscation against unauthorized Wi-Fi sensing?', 'answer': 2}, {'qid': 'easy-dblp-0047', 'question': 'How many papers in the DBLP citation network cited End-To-End Computer Vision Framework: An Open-Source Platform For Research And Education?', 'answer': 1}, {'qid': 'easy-dblp-0048', 'question': 'How many papers in the DBLP citation network cited Contractible Edges In K-Connected Graphs With Minimum Degree Greater Than Or Equal To Left Perpendicular 3k-1/2 Right Perpendicular?', 'answer': 1}, {'qid': 'easy-dblp-0049', 'question': 'How many papers in the DBLP citation network cited Analytical modeling for signaling‐based DRX in 5G communication?', 'answer': 1}]\n"
     ]
    }
   ],
   "source": [
    "# How many papers in the DBLP citation network cited {paper_title}?\n",
    "for i in range(num_question_per_template):\n",
    "    answer = 0\n",
    "    while answer == 0:\n",
    "        paper_id = random.choice(list(id2title_dict.keys()))\n",
    "        paper_title = paper_net.nodes[paper_id][\"title\"]\n",
    "        question = \"How many papers in the DBLP citation network cited \" + paper_title + \"?\"\n",
    "        # print(paper_net.neighbors(paper_id))\n",
    "        answer = paper_net.in_degree(paper_id)\n",
    "    questions.append({\"qid\": \"easy-dblp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-dblp-0050', 'question': 'How many collaborators does Seoyoung Ahn have in the DBLP citation network?', 'answer': 7}, {'qid': 'easy-dblp-0051', 'question': 'How many collaborators does Bohyung Han have in the DBLP citation network?', 'answer': 2}, {'qid': 'easy-dblp-0052', 'question': 'How many collaborators does Gözel Shakeri have in the DBLP citation network?', 'answer': 1}, {'qid': 'easy-dblp-0053', 'question': 'How many collaborators does Huijie Peng have in the DBLP citation network?', 'answer': 4}, {'qid': 'easy-dblp-0054', 'question': 'How many collaborators does Andrew D. Ferguson have in the DBLP citation network?', 'answer': 6}, {'qid': 'easy-dblp-0055', 'question': 'How many collaborators does Cliff Young have in the DBLP citation network?', 'answer': 8}, {'qid': 'easy-dblp-0056', 'question': 'How many collaborators does Nicholas J. Bryan have in the DBLP citation network?', 'answer': 3}, {'qid': 'easy-dblp-0057', 'question': 'How many collaborators does Zhonghua Zheng have in the DBLP citation network?', 'answer': 3}, {'qid': 'easy-dblp-0058', 'question': 'How many collaborators does Minsu Ko have in the DBLP citation network?', 'answer': 6}, {'qid': 'easy-dblp-0059', 'question': 'How many collaborators does Jan Mandel have in the DBLP citation network?', 'answer': 3}]\n"
     ]
    }
   ],
   "source": [
    "# How many collaborators does {author_name} have in the DBLP citation network?\n",
    "for i in range(num_question_per_template):\n",
    "    answer = 0\n",
    "    while answer == 0:\n",
    "        author_id = random.choice(list(id2author_dict.keys()))\n",
    "        author_name = id2author_dict[author_id]\n",
    "        question = \"How many collaborators does \" + author_name + \" have in the DBLP citation network?\"\n",
    "        # print(paper_net.neighbors(paper_id))\n",
    "        answer = author_net.degree(author_id)\n",
    "    questions.append({\"qid\": \"easy-dblp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-dblp-0060', 'question': 'How many papers did K Zheng and Q Zhou write together in the DBLP citation network?', 'answer': 3}, {'qid': 'easy-dblp-0061', 'question': 'How many papers did Payam Ghassemi and Amir Behjat write together in the DBLP citation network?', 'answer': 4}, {'qid': 'easy-dblp-0062', 'question': 'How many papers did Tai-Quan Peng and Lun Zhang write together in the DBLP citation network?', 'answer': 1}, {'qid': 'easy-dblp-0063', 'question': 'How many papers did Joachim von Zitzewitz and Grégoire Courtine write together in the DBLP citation network?', 'answer': 4}, {'qid': 'easy-dblp-0064', 'question': 'How many papers did Jonathan Tremblay and Thomas Müller write together in the DBLP citation network?', 'answer': 5}, {'qid': 'easy-dblp-0065', 'question': 'How many papers did Sarah Fakhoury and Cole S. Peterson write together in the DBLP citation network?', 'answer': 4}, {'qid': 'easy-dblp-0066', 'question': 'How many papers did Jingqin Wang and Jingtao Wang write together in the DBLP citation network?', 'answer': 1}, {'qid': 'easy-dblp-0067', 'question': 'How many papers did Seungyong Moon and Hyun Oh Song write together in the DBLP citation network?', 'answer': 1}, {'qid': 'easy-dblp-0068', 'question': 'How many papers did Mansouri Farnam and Hamid R. Rabiee write together in the DBLP citation network?', 'answer': 1}, {'qid': 'easy-dblp-0069', 'question': 'How many papers did Roger Gassert and Lambercy Olivier write together in the DBLP citation network?', 'answer': 4}]\n"
     ]
    }
   ],
   "source": [
    "# How many papers did {author1} and {author2} write together in the DBLP citation network?\n",
    "for i in range(num_question_per_template):\n",
    "    answer = 0\n",
    "    while answer == 0:\n",
    "        author_id1 = random.choice(list(id2author_dict.keys()))\n",
    "        author_name1 = id2author_dict[author_id1]\n",
    "        author_id2 = random.choice(list(author_net.neighbors(author_id1)))\n",
    "        author_name2 = id2author_dict[author_id2]\n",
    "        question = \"How many papers did \" + author_name1 + \" and \" + author_name2 + \" write together in the DBLP citation network?\"\n",
    "        # print(paper_net.neighbors(paper_id))\n",
    "        answer = len(set(author_net.neighbors(author_id1)) & set(author_net.neighbors(author_id2)))\n",
    "    questions.append({\"qid\": \"easy-dblp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-dblp-0070', 'question': 'What papers did Annahita Amireskandari write in the DBLP citation network?', 'answer': 'Superresolution and Segmentation of OCT Scans Using Multi-Stage Adversarial Guided Attention Training, MultiSDGAN: Translation of OCT Images to Superresolved Segmentation Labels Using Multi-Discriminators in Multi-Stages, Gan-Based Super-Resolution and Segmentation of Retinal Layers in Optical Coherence Tomography Scans, Superresolution and Segmentation of OCT Scans Using Multi-Stage Adversarial Guided Attention Training, MultiSDGAN: Translation of OCT Images to Superresolved Segmentation Labels Using Multi-Discriminators in Multi-Stages, Gan-Based Super-Resolution and Segmentation of Retinal Layers in Optical Coherence Tomography Scans, Superresolution and Segmentation of OCT Scans Using Multi-Stage Adversarial Guided Attention Training, Superresolution and Segmentation of OCT Scans Using Multi-Stage Adversarial Guided Attention Training, Superresolution and Segmentation of OCT Scans Using Multi-Stage Adversarial Guided Attention Training, MultiSDGAN: Translation of OCT Images to Superresolved Segmentation Labels Using Multi-Discriminators in Multi-Stages, Gan-Based Super-Resolution and Segmentation of Retinal Layers in Optical Coherence Tomography Scans'}, {'qid': 'easy-dblp-0071', 'question': 'What papers did Yan Zhang write in the DBLP citation network?', 'answer': 'Learning Motion Priors for 4D Human Body Capture in 3D Scenes., Learning Motion Priors for 4D Human Body Capture in 3D Scenes., Learning Motion Priors for 4D Human Body Capture in 3D Scenes., Learning Motion Priors for 4D Human Body Capture in 3D Scenes.'}, {'qid': 'easy-dblp-0072', 'question': 'What papers did Amine Ihamouten write in the DBLP citation network?', 'answer': 'Linking Degree Of Saturation With The Complex Dielectric Permittivity Of Limestone In A Gpr Frequency Band Using Svr, Linking Degree Of Saturation With The Complex Dielectric Permittivity Of Limestone In A Gpr Frequency Band Using Svr, Linking Degree Of Saturation With The Complex Dielectric Permittivity Of Limestone In A Gpr Frequency Band Using Svr'}, {'qid': 'easy-dblp-0073', 'question': 'What papers did Stefano Aldini write in the DBLP citation network?', 'answer': 'Computational Model of Robot Trust in Human Co-Worker for Physical Human-Robot Collaboration, Prediction-Error Negativity to Assess Singularity Avoidance Strategies in Physical Human-Robot Collaboration, Prediction-Error Negativity to Assess Singularity Avoidance Strategies in Physical Human-Robot Collaboration, Prediction-Error Negativity to Assess Singularity Avoidance Strategies in Physical Human-Robot Collaboration, Prediction-Error Negativity to Assess Singularity Avoidance Strategies in Physical Human-Robot Collaboration, Prediction-Error Negativity to Assess Singularity Avoidance Strategies in Physical Human-Robot Collaboration'}, {'qid': 'easy-dblp-0074', 'question': 'What papers did Jun Zeng write in the DBLP citation network?', 'answer': 'Autonomous Navigation for Quadrupedal Robots with Optimized Jumping through Constrained Obstacles., Autonomous Navigation for Quadrupedal Robots with Optimized Jumping through Constrained Obstacles., Autonomous Navigation for Quadrupedal Robots with Optimized Jumping through Constrained Obstacles., Bayesian Optimization Meets Hybrid Zero Dynamics: Safe Parameter Learning for Bipedal Locomotion Control., Autonomous Navigation for Quadrupedal Robots with Optimized Jumping through Constrained Obstacles., Bayesian Optimization Meets Hybrid Zero Dynamics: Safe Parameter Learning for Bipedal Locomotion Control., Autonomous Navigation for Quadrupedal Robots with Optimized Jumping through Constrained Obstacles., Bayesian Optimization Meets Hybrid Zero Dynamics: Safe Parameter Learning for Bipedal Locomotion Control.'}, {'qid': 'easy-dblp-0075', 'question': 'What papers did Dongdong Mu write in the DBLP citation network?', 'answer': 'Trajectory Tracking Control For Underactuated Unmanned Surface Vehicle Subject To Uncertain Dynamics And Input Saturation, A Formation Autonomous Navigation System for Unmanned Surface Vehicles With Distributed Control Strategy, A Formation Autonomous Navigation System for Unmanned Surface Vehicles With Distributed Control Strategy, A Formation Autonomous Navigation System for Unmanned Surface Vehicles With Distributed Control Strategy, A Formation Autonomous Navigation System for Unmanned Surface Vehicles With Distributed Control Strategy'}, {'qid': 'easy-dblp-0076', 'question': 'What papers did Alessandro Brighente write in the DBLP citation network?', 'answer': 'Adaptive Coordinated Random Access For MTC With Correlated Traffic and Data Freshness, Interference Prediction for Low-Complexity Link Adaptation in Beyond 5G Ultra-Reliable Low-Latency Communications, Interference Prediction for Low-Complexity Link Adaptation in Beyond 5G Ultra-Reliable Low-Latency Communications, Interference Prediction for Low-Complexity Link Adaptation in Beyond 5G Ultra-Reliable Low-Latency Communications, Interference Prediction for Low-Complexity Link Adaptation in Beyond 5G Ultra-Reliable Low-Latency Communications'}, {'qid': 'easy-dblp-0077', 'question': 'What papers did Christopher Yau write in the DBLP citation network?', 'answer': 'Basisdevae: Interpretable Simultaneous Dimensionality Reduction And Feature-Level Clustering With Derivative-Based Variational Autoencoders'}, {'qid': 'easy-dblp-0078', 'question': 'What papers did Veera Rajendran write in the DBLP citation network?', 'answer': 'Deep Learning based Crop Row Detection with Online Domain Adaptation, Deep Learning based Crop Row Detection with Online Domain Adaptation, Deep Learning based Crop Row Detection with Online Domain Adaptation'}, {'qid': 'easy-dblp-0079', 'question': 'What papers did Chenlu Shen write in the DBLP citation network?', 'answer': 'Generating Knowledge-Based Attentive User Representations for Sparse Interaction Recommendation, Generating Knowledge-Based Attentive User Representations for Sparse Interaction Recommendation'}]\n"
     ]
    }
   ],
   "source": [
    "# What papers did {author_name} write in the DBLP citation network?\n",
    "for i in range(num_question_per_template):\n",
    "    answer = 0\n",
    "    while answer == 0 or answer == \"\":\n",
    "        author_id = random.choice(list(id2author_dict.keys()))\n",
    "        author_name = id2author_dict[author_id]\n",
    "        question = \"What papers did \" + author_name + \" write in the DBLP citation network?\"\n",
    "        # print(paper_net.neighbors(paper_id))\n",
    "        papers = []\n",
    "        for neighbour_id in author_net.neighbors(author_id):\n",
    "            for paper in author_net[author_id][neighbour_id][\"papers\"]:\n",
    "                papers.append(id2title_dict[paper])\n",
    "        answer = \", \".join(papers)\n",
    "    questions.append({\"qid\": \"easy-dblp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-dblp-0010', 'question': 'What organization is Bradley E. Rucker from?', 'answer': 'Computer Science University of Dayton, United States'}, {'qid': 'easy-dblp-0011', 'question': 'What organization is Ruben Puche-Panadero from?', 'answer': 'Univ Politecn Valencia, Inst Energy Engn, Camino Vera S-N, Valencia 46022, Spain'}, {'qid': 'easy-dblp-0012', 'question': 'What organization is Sarat Chandra Swain from?', 'answer': 'KIIT DU, Dept Elect Engn, Bhubaneswar, India'}, {'qid': 'easy-dblp-0013', 'question': 'What organization is Hongtao Su from?', 'answer': 'Xidian Univ, Natl Lab Radar Signal Proc, Xian 710071, Peoples R China'}, {'qid': 'easy-dblp-0014', 'question': 'What organization is Junru Jin from?', 'answer': 'Shandong Univ, Sch Software, Jinan 250101, Peoples R China'}, {'qid': 'easy-dblp-0015', 'question': 'What organization is Allison Woodruff from?', 'answer': 'Google, Mountain View, CA 94043 USA'}, {'qid': 'easy-dblp-0016', 'question': 'What organization is Allison C. Tam from?', 'answer': 'DeepMind'}, {'qid': 'easy-dblp-0017', 'question': 'What organization is Eric C Porges from?', 'answer': 'Department of Clinical and Health Psychology, Center for Cognitive Aging and Memory, and McKnight Brain Research Foundation, University of Florida, Gainesville, FL, USA'}, {'qid': 'easy-dblp-0018', 'question': 'What organization is Shengdong Zhang from?', 'answer': 'School of Electronic and Computer Engineering, Shenzhen Graduate School, Peking University, Shenzhen, China'}, {'qid': 'easy-dblp-0019', 'question': 'What organization is Zhanchao Li from?', 'answer': 'Guangdong Pharmaceut Univ, Sch Chem & Chem Engn, Guangzhou 510006, Peoples R China'}, {'qid': 'easy-dblp-0020', 'question': 'How many pages is Turbocharging Treewidth-Bounded Bayesian Network Structure Learning?', 'answer': 9}, {'qid': 'easy-dblp-0021', 'question': 'How many pages is Smoothing Methods for Nonlinear Complementarity Problems.?', 'answer': 19}, {'qid': 'easy-dblp-0022', 'question': 'How many pages is Construction of an efficient multivariate dynamic screening system.?', 'answer': 13}, {'qid': 'easy-dblp-0023', 'question': 'How many pages is End-to-End Learning of OFDM Waveforms with PAPR and ACLR Constraints?', 'answer': 6}, {'qid': 'easy-dblp-0024', 'question': 'How many pages is Recommending API Usages for Mobile Apps with Hidden Markov Model.?', 'answer': 6}, {'qid': 'easy-dblp-0025', 'question': 'How many pages is Finding a Shortest Odd Hole?', 'answer': 21}, {'qid': 'easy-dblp-0026', 'question': 'How many pages is A Multinomial Naïve Bayesian (MNB) Network to Automatically Recommend Topics for GitHub Repositories.?', 'answer': 10}, {'qid': 'easy-dblp-0027', 'question': 'How many pages is Pole-Zero Cancellation Speed Control With Variable Current Cut-Off Frequency for Servo Motors?', 'answer': 8}, {'qid': 'easy-dblp-0028', 'question': 'How many pages is Refactoring Java Monoliths into Executable Microservice-Based Applications.?', 'answer': 8}, {'qid': 'easy-dblp-0029', 'question': 'How many pages is A data reduction scheme for triangulated surfaces?', 'answer': 18}, {'qid': 'easy-dblp-0030', 'question': 'How many papers did Rate-Distortion Optimal Transform Coefficient Selection for Unoccupied Regions in Video-Based Point Cloud Compression cite?', 'answer': 17}, {'qid': 'easy-dblp-0031', 'question': 'How many papers did A method to include reservoir operations in catchment hydrological models using SHETRAN cite?', 'answer': 5}, {'qid': 'easy-dblp-0032', 'question': 'How many papers did GPU-aware resource management in heterogeneous cloud data centers cite?', 'answer': 5}, {'qid': 'easy-dblp-0033', 'question': 'How many papers did High order discretization methods for spatial-dependent epidemic models cite?', 'answer': 4}, {'qid': 'easy-dblp-0034', 'question': 'How many papers did Performance-Guaranteed Fault-Tolerant Control for Uncertain Nonlinear Systems via Learning-Based Switching Scheme cite?', 'answer': 25}, {'qid': 'easy-dblp-0035', 'question': 'How many papers did Mean-Based Trace Reconstruction Over Oblivious Synchronization Channels cite?', 'answer': 15}, {'qid': 'easy-dblp-0036', 'question': 'How many papers did Contextual Documentation Referencing on Stack Overflow cite?', 'answer': 19}, {'qid': 'easy-dblp-0037', 'question': 'How many papers did Toward Predicting Stay Time for Private Car Users: A RNN-NALU Approach cite?', 'answer': 20}, {'qid': 'easy-dblp-0038', 'question': 'How many papers did A stacked deep learning approach to cyber-attacks detection in industrial systems: application to power system and gas pipeline systems cite?', 'answer': 20}, {'qid': 'easy-dblp-0039', 'question': 'How many papers did Software fault classification using extreme learning machine: a cognitive approach cite?', 'answer': 28}, {'qid': 'easy-dblp-0040', 'question': 'How many papers in the DBLP citation network cited A Data Aggregation Based Approach To Exploit Dynamic Spatio-Temporal Correlations For Citywide Crowd Flows Prediction In Fog Computing?', 'answer': 8}, {'qid': 'easy-dblp-0041', 'question': 'How many papers in the DBLP citation network cited Contrastive Adversarial Domain Adaptation for Machine Remaining Useful Life Prediction?', 'answer': 1}, {'qid': 'easy-dblp-0042', 'question': 'How many papers in the DBLP citation network cited Distributed Attack Detection in a Water Treatment Plant: Method and Case Study?', 'answer': 1}, {'qid': 'easy-dblp-0043', 'question': 'How many papers in the DBLP citation network cited Online and Unsupervised Anomaly Detection for Streaming Data Using an Array of Sliding Windows and PDDs?', 'answer': 1}, {'qid': 'easy-dblp-0044', 'question': 'How many papers in the DBLP citation network cited A novel gray wolf optimizer with RNA crossover operation for tackling the non-parametric modeling problem of FCC process?', 'answer': 1}, {'qid': 'easy-dblp-0045', 'question': 'How many papers in the DBLP citation network cited A Grouping Differential Evolution Algorithm Boosted by Attraction and Repulsion Strategies for Masi Entropy-Based Multi-Level Image Segmentation?', 'answer': 1}, {'qid': 'easy-dblp-0046', 'question': 'How many papers in the DBLP citation network cited AntiSense: Standard-compliant CSI obfuscation against unauthorized Wi-Fi sensing?', 'answer': 2}, {'qid': 'easy-dblp-0047', 'question': 'How many papers in the DBLP citation network cited End-To-End Computer Vision Framework: An Open-Source Platform For Research And Education?', 'answer': 1}, {'qid': 'easy-dblp-0048', 'question': 'How many papers in the DBLP citation network cited Contractible Edges In K-Connected Graphs With Minimum Degree Greater Than Or Equal To Left Perpendicular 3k-1/2 Right Perpendicular?', 'answer': 1}, {'qid': 'easy-dblp-0049', 'question': 'How many papers in the DBLP citation network cited Analytical modeling for signaling‐based DRX in 5G communication?', 'answer': 1}, {'qid': 'easy-dblp-0050', 'question': 'How many collaborators does Seoyoung Ahn have in the DBLP citation network?', 'answer': 7}, {'qid': 'easy-dblp-0051', 'question': 'How many collaborators does Bohyung Han have in the DBLP citation network?', 'answer': 2}, {'qid': 'easy-dblp-0052', 'question': 'How many collaborators does Gözel Shakeri have in the DBLP citation network?', 'answer': 1}, {'qid': 'easy-dblp-0053', 'question': 'How many collaborators does Huijie Peng have in the DBLP citation network?', 'answer': 4}, {'qid': 'easy-dblp-0054', 'question': 'How many collaborators does Andrew D. Ferguson have in the DBLP citation network?', 'answer': 6}, {'qid': 'easy-dblp-0055', 'question': 'How many collaborators does Cliff Young have in the DBLP citation network?', 'answer': 8}, {'qid': 'easy-dblp-0056', 'question': 'How many collaborators does Nicholas J. Bryan have in the DBLP citation network?', 'answer': 3}, {'qid': 'easy-dblp-0057', 'question': 'How many collaborators does Zhonghua Zheng have in the DBLP citation network?', 'answer': 3}, {'qid': 'easy-dblp-0058', 'question': 'How many collaborators does Minsu Ko have in the DBLP citation network?', 'answer': 6}, {'qid': 'easy-dblp-0059', 'question': 'How many collaborators does Jan Mandel have in the DBLP citation network?', 'answer': 3}, {'qid': 'easy-dblp-0060', 'question': 'How many papers did K Zheng and Q Zhou write together in the DBLP citation network?', 'answer': 3}, {'qid': 'easy-dblp-0061', 'question': 'How many papers did Payam Ghassemi and Amir Behjat write together in the DBLP citation network?', 'answer': 4}, {'qid': 'easy-dblp-0062', 'question': 'How many papers did Tai-Quan Peng and Lun Zhang write together in the DBLP citation network?', 'answer': 1}, {'qid': 'easy-dblp-0063', 'question': 'How many papers did Joachim von Zitzewitz and Grégoire Courtine write together in the DBLP citation network?', 'answer': 4}, {'qid': 'easy-dblp-0064', 'question': 'How many papers did Jonathan Tremblay and Thomas Müller write together in the DBLP citation network?', 'answer': 5}, {'qid': 'easy-dblp-0065', 'question': 'How many papers did Sarah Fakhoury and Cole S. Peterson write together in the DBLP citation network?', 'answer': 4}, {'qid': 'easy-dblp-0066', 'question': 'How many papers did Jingqin Wang and Jingtao Wang write together in the DBLP citation network?', 'answer': 1}, {'qid': 'easy-dblp-0067', 'question': 'How many papers did Seungyong Moon and Hyun Oh Song write together in the DBLP citation network?', 'answer': 1}, {'qid': 'easy-dblp-0068', 'question': 'How many papers did Mansouri Farnam and Hamid R. Rabiee write together in the DBLP citation network?', 'answer': 1}, {'qid': 'easy-dblp-0069', 'question': 'How many papers did Roger Gassert and Lambercy Olivier write together in the DBLP citation network?', 'answer': 4}, {'qid': 'easy-dblp-0070', 'question': 'What papers did Annahita Amireskandari write in the DBLP citation network?', 'answer': 'Superresolution and Segmentation of OCT Scans Using Multi-Stage Adversarial Guided Attention Training, MultiSDGAN: Translation of OCT Images to Superresolved Segmentation Labels Using Multi-Discriminators in Multi-Stages, Gan-Based Super-Resolution and Segmentation of Retinal Layers in Optical Coherence Tomography Scans, Superresolution and Segmentation of OCT Scans Using Multi-Stage Adversarial Guided Attention Training, MultiSDGAN: Translation of OCT Images to Superresolved Segmentation Labels Using Multi-Discriminators in Multi-Stages, Gan-Based Super-Resolution and Segmentation of Retinal Layers in Optical Coherence Tomography Scans, Superresolution and Segmentation of OCT Scans Using Multi-Stage Adversarial Guided Attention Training, Superresolution and Segmentation of OCT Scans Using Multi-Stage Adversarial Guided Attention Training, Superresolution and Segmentation of OCT Scans Using Multi-Stage Adversarial Guided Attention Training, MultiSDGAN: Translation of OCT Images to Superresolved Segmentation Labels Using Multi-Discriminators in Multi-Stages, Gan-Based Super-Resolution and Segmentation of Retinal Layers in Optical Coherence Tomography Scans'}, {'qid': 'easy-dblp-0071', 'question': 'What papers did Yan Zhang write in the DBLP citation network?', 'answer': 'Learning Motion Priors for 4D Human Body Capture in 3D Scenes., Learning Motion Priors for 4D Human Body Capture in 3D Scenes., Learning Motion Priors for 4D Human Body Capture in 3D Scenes., Learning Motion Priors for 4D Human Body Capture in 3D Scenes.'}, {'qid': 'easy-dblp-0072', 'question': 'What papers did Amine Ihamouten write in the DBLP citation network?', 'answer': 'Linking Degree Of Saturation With The Complex Dielectric Permittivity Of Limestone In A Gpr Frequency Band Using Svr, Linking Degree Of Saturation With The Complex Dielectric Permittivity Of Limestone In A Gpr Frequency Band Using Svr, Linking Degree Of Saturation With The Complex Dielectric Permittivity Of Limestone In A Gpr Frequency Band Using Svr'}, {'qid': 'easy-dblp-0073', 'question': 'What papers did Stefano Aldini write in the DBLP citation network?', 'answer': 'Computational Model of Robot Trust in Human Co-Worker for Physical Human-Robot Collaboration, Prediction-Error Negativity to Assess Singularity Avoidance Strategies in Physical Human-Robot Collaboration, Prediction-Error Negativity to Assess Singularity Avoidance Strategies in Physical Human-Robot Collaboration, Prediction-Error Negativity to Assess Singularity Avoidance Strategies in Physical Human-Robot Collaboration, Prediction-Error Negativity to Assess Singularity Avoidance Strategies in Physical Human-Robot Collaboration, Prediction-Error Negativity to Assess Singularity Avoidance Strategies in Physical Human-Robot Collaboration'}, {'qid': 'easy-dblp-0074', 'question': 'What papers did Jun Zeng write in the DBLP citation network?', 'answer': 'Autonomous Navigation for Quadrupedal Robots with Optimized Jumping through Constrained Obstacles., Autonomous Navigation for Quadrupedal Robots with Optimized Jumping through Constrained Obstacles., Autonomous Navigation for Quadrupedal Robots with Optimized Jumping through Constrained Obstacles., Bayesian Optimization Meets Hybrid Zero Dynamics: Safe Parameter Learning for Bipedal Locomotion Control., Autonomous Navigation for Quadrupedal Robots with Optimized Jumping through Constrained Obstacles., Bayesian Optimization Meets Hybrid Zero Dynamics: Safe Parameter Learning for Bipedal Locomotion Control., Autonomous Navigation for Quadrupedal Robots with Optimized Jumping through Constrained Obstacles., Bayesian Optimization Meets Hybrid Zero Dynamics: Safe Parameter Learning for Bipedal Locomotion Control.'}, {'qid': 'easy-dblp-0075', 'question': 'What papers did Dongdong Mu write in the DBLP citation network?', 'answer': 'Trajectory Tracking Control For Underactuated Unmanned Surface Vehicle Subject To Uncertain Dynamics And Input Saturation, A Formation Autonomous Navigation System for Unmanned Surface Vehicles With Distributed Control Strategy, A Formation Autonomous Navigation System for Unmanned Surface Vehicles With Distributed Control Strategy, A Formation Autonomous Navigation System for Unmanned Surface Vehicles With Distributed Control Strategy, A Formation Autonomous Navigation System for Unmanned Surface Vehicles With Distributed Control Strategy'}, {'qid': 'easy-dblp-0076', 'question': 'What papers did Alessandro Brighente write in the DBLP citation network?', 'answer': 'Adaptive Coordinated Random Access For MTC With Correlated Traffic and Data Freshness, Interference Prediction for Low-Complexity Link Adaptation in Beyond 5G Ultra-Reliable Low-Latency Communications, Interference Prediction for Low-Complexity Link Adaptation in Beyond 5G Ultra-Reliable Low-Latency Communications, Interference Prediction for Low-Complexity Link Adaptation in Beyond 5G Ultra-Reliable Low-Latency Communications, Interference Prediction for Low-Complexity Link Adaptation in Beyond 5G Ultra-Reliable Low-Latency Communications'}, {'qid': 'easy-dblp-0077', 'question': 'What papers did Christopher Yau write in the DBLP citation network?', 'answer': 'Basisdevae: Interpretable Simultaneous Dimensionality Reduction And Feature-Level Clustering With Derivative-Based Variational Autoencoders'}, {'qid': 'easy-dblp-0078', 'question': 'What papers did Veera Rajendran write in the DBLP citation network?', 'answer': 'Deep Learning based Crop Row Detection with Online Domain Adaptation, Deep Learning based Crop Row Detection with Online Domain Adaptation, Deep Learning based Crop Row Detection with Online Domain Adaptation'}, {'qid': 'easy-dblp-0079', 'question': 'What papers did Chenlu Shen write in the DBLP citation network?', 'answer': 'Generating Knowledge-Based Attentive User Representations for Sparse Interaction Recommendation, Generating Knowledge-Based Attentive User Representations for Sparse Interaction Recommendation'}, {'qid': 'easy-dblp-0080', 'question': 'How many papers did Erdogan Pekcan Erkan write in the DBLP citation network?', 'answer': 6}, {'qid': 'easy-dblp-0081', 'question': 'How many papers did Mucong Li write in the DBLP citation network?', 'answer': 5}, {'qid': 'easy-dblp-0082', 'question': 'How many papers did Liang Shi write in the DBLP citation network?', 'answer': 5}, {'qid': 'easy-dblp-0083', 'question': 'How many papers did Biplab Sikdar write in the DBLP citation network?', 'answer': 21}, {'qid': 'easy-dblp-0084', 'question': 'How many papers did Aakash Ahmad write in the DBLP citation network?', 'answer': 5}, {'qid': 'easy-dblp-0085', 'question': 'How many papers did Mainack Mondal write in the DBLP citation network?', 'answer': 4}, {'qid': 'easy-dblp-0086', 'question': 'How many papers did Joel M. Cooper write in the DBLP citation network?', 'answer': 2}, {'qid': 'easy-dblp-0087', 'question': 'How many papers did Yao Qin write in the DBLP citation network?', 'answer': 4}, {'qid': 'easy-dblp-0088', 'question': 'How many papers did Debarghya Ghoshdastidar write in the DBLP citation network?', 'answer': 2}, {'qid': 'easy-dblp-0089', 'question': 'How many papers did Reza Lotfidereshgi write in the DBLP citation network?', 'answer': 1}]\n"
     ]
    }
   ],
   "source": [
    "# How many papers did {author_name} write in the DBLP citation network?\n",
    "\n",
    "for i in range(num_question_per_template):\n",
    "    answer = 0\n",
    "    while answer == 0:\n",
    "        author_id = random.choice(list(id2author_dict.keys()))\n",
    "        author_name = id2author_dict[author_id]\n",
    "        question = \"How many papers did \" + author_name + \" write in the DBLP citation network?\"\n",
    "        answer = 0\n",
    "        for neighbour_id in author_net.neighbors(author_id):\n",
    "            answer += len(author_net[author_id][neighbour_id][\"papers\"])\n",
    "    questions.append({\"qid\": \"easy-dblp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-dblp-0090', 'question': 'What venue did TS Chua and X He collaborate most in the DBLP citation network?', 'answer': 'Annual Conference on Neural Information Processing Systems'}, {'qid': 'easy-dblp-0091', 'question': 'What venue did Eric F. Vermote and J.-C. Roger collaborate most in the DBLP citation network?', 'answer': 'IGARSS'}, {'qid': 'easy-dblp-0092', 'question': 'What venue did Per Jesper Sjöström and Rui Ponte Costa collaborate most in the DBLP citation network?', 'answer': 'PLOS COMPUTATIONAL BIOLOGY'}, {'qid': 'easy-dblp-0093', 'question': 'What venue did Robert M Roth and Brian MacWhinney collaborate most in the DBLP citation network?', 'answer': 'COMPUTER SPEECH AND LANGUAGE'}, {'qid': 'easy-dblp-0094', 'question': 'What venue did Chetna Singhal and Raja Datta collaborate most in the DBLP citation network?', 'answer': 'COMPUTER COMMUNICATIONS'}, {'qid': 'easy-dblp-0095', 'question': 'What venue did Kang Yang and Ziyi Zhou collaborate most in the DBLP citation network?', 'answer': '2021 IEEE/ACM 29th International Conference on Program Comprehension (ICPC)'}, {'qid': 'easy-dblp-0096', 'question': 'What venue did Hadi Daneshmand and Amir Joudaki collaborate most in the DBLP citation network?', 'answer': 'Annual Conference on Neural Information Processing Systems'}, {'qid': 'easy-dblp-0097', 'question': 'What venue did Juliane Fischer and Birgit Vogel-Heuser collaborate most in the DBLP citation network?', 'answer': 'IEEE ROBOTICS AND AUTOMATION LETTERS'}, {'qid': 'easy-dblp-0098', 'question': 'What venue did Penelope Collins and Dakuo Wang collaborate most in the DBLP citation network?', 'answer': 'COMPUTERS & EDUCATION'}, {'qid': 'easy-dblp-0099', 'question': 'What venue did Xiaohua Zhu and Hong Hong collaborate most in the DBLP citation network?', 'answer': 'IEEE Communications Letters'}]\n"
     ]
    }
   ],
   "source": [
    "# What venue did {author1} and Renaud {author2} collaborate most in the DBLP citation network?\n",
    "for i in range(num_question_per_template):\n",
    "    answer = 0\n",
    "    while answer == 0:\n",
    "        author_id1 = random.choice(list(id2author_dict.keys()))\n",
    "        # print(author_id1)\n",
    "        author_name1 = id2author_dict[author_id1]\n",
    "        if len(list(author_net.neighbors(author_id1))) == 0:\n",
    "            answer = 0\n",
    "        else:\n",
    "            author_id2 = random.choice(list(author_net.neighbors(author_id1)))\n",
    "            # print(list(author_net.neighbors(author_id1)))\n",
    "            author_name2 = id2author_dict[author_id2]\n",
    "            question = \"What venue did \" + author_name1 + \" and \" + author_name2 + \" collaborate most in the DBLP citation network?\"\n",
    "            venues = []\n",
    "            for paper in author_net[author_id1][author_id2][\"papers\"]:\n",
    "                venues.append(paper_net.nodes[paper][\"venue\"][\"raw\"])\n",
    "            if len(venues) > 0:\n",
    "                answer = max(set(venues), key=venues.count)\n",
    "            else:\n",
    "                answer = 0\n",
    "    questions.append({\"qid\": \"easy-dblp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "with jsonlines.open('/<YOUR_OWN_PATH>/ToolQA/data/questions/easy/dblp-easy.jsonl', 'w') as writer:\n",
    "    for row in questions:\n",
    "        writer.write(row)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llm",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
